For this Seurat analysis, we used the following parameters:
pars <- as.data.frame(do.call("rbind", params))
colnames(pars) <- "parameters_used"
pars
Starting with this Seurat object from Mireille.
path <- "/fast/work/groups/cubi/milekm/mireille/de/rerun/downstream_analysis/"
sobj <- readRDS(file.path(path,params$path_to_object))
selected_cell_types <- sobj[[]] %>%
pull(predicted.id) %>%
unique()
# sample <- sobj[[]] %>%
# select(group,source_name) %>%
# mutate(sample=paste0(group,"_",source_name)) %>%
# select(sample)
# sobj <- AddMetaData(sobj, metadata = sample)
# cluster_metadata_3 <- sobj[[]] %>%
# mutate(age = str_extract(group, "^[0-9]+w")) %>%
# mutate(strain = str_extract(group, "w(E|SPF|)")) %>%
# mutate(time = str_extract(group, "[0-9]+days")) %>%
# mutate(arm = paste(age, strain, time, sep = "_")) %>%
# group_by(arm) %>%
# mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse)))) %>%
# select(sample, cell_type, group, mouse, age, location, time, strain,pseudoID) %>%
# distinct()
expr <- list()
for (cell_type in selected_cell_types) {
for (sample in unique(sobj[[]]$sample)) {
# sample <- unique(sobj[[]]$sample)[1]
# cell_type <- selected_cell_types[1]
cells <- Cells(sobj)[(sobj@meta.data$sample==sample & sobj$predicted.id==cell_type ) ]
cells <- cells[!is.na(cells)]
if (length(cells) > 1) {
expr[[paste0(cell_type,';',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
}
}
}
for (sample in unique(sobj[[]]$sample)) {
cells <- Cells(sobj)[(sobj@meta.data$sample==sample )]
cells <- cells[!is.na(cells)]
expr[[paste0('all;',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
}
sample_name <- sub(".*-","",sub(";.*","", sub(';','-',names(expr))))
colData<-data.frame(sample=names(expr),
cell_type=factor(sub(';.*','',names(expr))),
sample_name=sample_name,
group=sub(".*-","",sub("_","-",sub("_","-",sample_name))),
mouse=gsub("-","_",sub("_.*","",sub("_","-",sample_name))) ,
age=sub("_.*","",sub(".*-","",sub("_","-",sub("_","-",sample_name)))),
compartment=sub(".*_", "", sub("_[fpd].*", "", sample_name)),
time = sub(".*_", "", sub("days.*", "days", sample_name)),
strain = ifelse(grepl("12w",sample_name), "young",
ifelse(grepl("26w_SPF", sample_name), "aged",
ifelse(grepl("26w_nonSPF", sample_name),"immuno", NA))))
colData <- colData %>%
mutate(age_time_strain = paste0(age, "_", time, "_", strain)) %>%
group_by(age_time_strain) %>%
mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse))))%>%
ungroup()
counts <- do.call(cbind, expr)
Get number of reads per sample.
lengths <- colSums(counts)
df_lengths <- data.frame(sample = names(lengths),
n_reads = lengths) %>%
mutate(group = sub(".*-","",sub("_","-",sub("_","-",sub(".*;", "",sample)))),
celltype = sub(";.*", "", sample))
ggplot(df_lengths, aes(celltype,n_reads,fill=group))+
geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2) +
facet_wrap(~celltype, scales="free")
We run DESeq2 by including the pseudoID in the model. Here we assume that the age_time_location of the mouse is replicated in triplicate (pseudoID). We use all cells, there is not enough cells to look at subtypes.
# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all")
# selected_contrasts <- c("12wfx_5days_vs_12wfx_2days",
# "26wEfx_5days_vs_26wEfx_2days",
# "26wSPFfx_5days_vs_26wSPFfx_2days",
# "26wEfx_2days_vs_12wfx_2days",
# "26wSPFfx_2days_vs_12wfx_2days",
# "26wSPFfx_2days_vs_26wEfx_2days",
# "12wdist_5days_vs_12wfx_5days",
# "12wprox_5days_vs_12wfx_5days",
# "12wdist_5days_vs_12wprox_5days",
# "26wEdist_5days_vs_26wEfx_5days",
# "26wEprox_5days_vs_26wEfx_5days",
# "26wEdist_5days_vs_26wEprox_5days",
# "26wSPFdist_5days_vs_26wSPFfx_5days",
# "26wSPFprox_5days_vs_26wSPFfx_5days",
# "26wSPFdist_5days_vs_26wSPFprox_5days")
selected_contrasts <- c("12w_nonSPF_5days_fx_vs_12w_nonSPF_2days_fx",
"26w_nonSPF_5days_fx_vs_26w_nonSPF_2days_fx",
"26w_SPF_5days_fx_vs_26w_SPF_2days_fx",
"26w_nonSPF_2days_fx_vs_12w_nonSPF_2days_fx",
"26w_SPF_2days_fx_vs_12w_nonSPF_2days_fx",
"26w_SPF_2days_fx_vs_26w_nonSPF_2days_fx",
"12w_nonSPF_5days_distal_vs_12w_nonSPF_5days_fx",
"12w_nonSPF_5days_proximal_vs_12w_nonSPF_5days_fx",
"12w_nonSPF_5days_distal_vs_12w_nonSPF_5days_proximal",
"26w_nonSPF_5days_distal_vs_26w_nonSPF_5days_fx",
"26w_nonSPF_5days_proximal_vs_26w_nonSPF_5days_fx",
"26w_nonSPF_5days_distal_vs_26w_nonSPF_5days_proximal",
"26w_SPF_5days_distal_vs_26w_SPF_5days_fx",
"26w_SPF_5days_proximal_vs_26w_SPF_5days_fx",
"26w_SPF_5days_distal_vs_26w_SPF_5days_proximal")
# all_groups <- colData$group %>%
# unique()
# contrasts <- expand_grid(perturbation=all_groups, reference=all_groups) %>%
# filter(perturbation < reference) %>%
# mutate(contrast = paste0(perturbation,"_vs_",reference)) %>%
# pull(contrast)
res <- list()
for (celltype in selected_cell_types){
coldata_df <- colData %>%
dplyr::filter(cell_type %in% celltype)
counts_df <- subset(counts, select = coldata_df$sample)
if (sum(colSums(counts_df)) != 0){
dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
colData=coldata_df,
design=~pseudoID + group)
contrasts <- selected_contrasts
sub_res <- list()
for (comparison in contrasts){
perturbation <- sub("_vs_.*", "", comparison)
reference <- sub(".*_vs_", "", comparison)
dds$group <- relevel(dds$group, ref = reference)
if (celltype != "all" & celltype != "all_cells"){
dds <- estimateSizeFactors(dds, type='poscounts')
dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
} else {
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
}
if(params$shrinkage_type=="apeglm"){
sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
} else {
sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
}
}
}
res[[celltype]] <- do.call("rbind", sub_res) %>%
tibble::remove_rownames()
}
res_df <- do.call("rbind", res) %>%
tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(!is.na(padj), contrast == comparison) %>%
ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison,padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2, scales = "free")
print(p)
}
# for (comparison in unique(res_df$contrast)){
# p <- res_df %>%
# dplyr::filter(contrast == comparison) %>%
# ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
# geom_point(size=.5, shape=1) +
# geom_label_repel(data=res_df %>%
# dplyr::filter(contrast == comparison, padj < .05),
# aes(label=gene_name),
# segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
# label.padding=0.05,label.size=NA) +
# scale_color_manual(values=c('black','red')) +
# scale_x_continuous(trans='log10') +
# coord_cartesian(ylim =c(-4,4)) +
# theme_classic() +
# theme(legend.position='none',
# strip.background=element_blank())+
# ggtitle(comparison)+
# facet_wrap(~cell_type, ncol = 2)
# print(p)
# }
res_df %>%
dplyr::arrange(padj) %>%
dplyr::filter(!is.na(padj) & (padj < .05)) %>%
dplyr::mutate_if(is.numeric, signif, 2) %>%
DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
dir.create("results_list_310124", recursive=T, showWarnings=T)
write.table(res_df,"results_list_310124/de_results_pseudoid_analysis.tsv", quote=F, sep="\t",row.names = F)
library(tmod)
# data(tmod)
#
# library(orthomapper)
# mousehum <- orthologs(10090, 9606)
# mousehum$h_symbol <- entrez_annotate(mousehum$T.9606)$SYMBOL
# mousehum$m_symbol <- entrez_annotate(mousehum$T.10090)$SYMBOL
#
# mousehum <- mousehum[order(mousehum$h_symbol),]
# rows_tmod <- which(mousehum$h_symbol %in% tmod$gv)
# mgenes <- mousehum$m_symbol[rows_tmod]
#
# list_tmod <- sapply(tmod$gs2gv, function(x) tmod$gv[x] )
# list_tmod <- sapply(list_tmod, function(x) x[order(x)])
# list_tmod <- sapply(list_tmod, function(x) x[which(x %in% mousehum$h_symbol)])
# list_tmod <- sapply(list_tmod, function(x) mousehum[which(mousehum$h_symbol %in% x), "m_symbol"])
# list_tmod <- sapply(list_tmod, function(x) which(mgenes %in% x))
#
# tmod_mm <- tmod
#
# tmod_mm$gv <- mgenes
# tmod_mm$gs2gv <- list_tmod
# save tmod mouse object
# saveRDS(tmod_mm, "tmod_mm.rds")
tmod_mm <- readRDS("tmod_mm.rds")
df2tmod <- function(df, gene_id_col=ncol(df), module_id_col=1, module_title_col=2) {
require(tmod, quietly=TRUE)
gene_ids <- df[[gene_id_col]]
module_ids <- df[[module_id_col]]
m2g <- tapply(gene_ids, module_ids, list)
df[[gene_id_col]] <- NULL
df <- df[!duplicated(df[[module_id_col]]), ]
colnames(df)[module_id_col] <- "ID"
colnames(df)[module_title_col] <- "Title"
makeTmod(modules=df, modules2genes=m2g)
}
df <- as.data.frame(msigdbr::msigdbr(species='Mus musculus'))
df <- df[ , c("gs_name", "gs_id", "gs_cat", "gs_subcat", "gene_symbol") ]
colnames(df) <- c("Title", "ID", "Category", "Subcategory", "GeneID")
msig <- df2tmod(df[!is.na(df$GeneID),], gene_id_col=ncol(df), module_id_col=2, module_title_col=1)
sel <- (msig$gs$Category %in% c("H")) | (msig$gs$Subcategory %in% c('CP:REACTOME','GO:BP','CP:KEGG'))
res <- split(res_df, f = res_df$contrast)
genes <- lapply(res, function(x) x %>%
filter(!is.na(padj)) %>%
arrange(pvalue))
lgenes <- lapply(genes, function(x) x$gene_name)
res.tmod <- lapply(lgenes, tmodCERNOtest, mset = msig[sel])
res.tmod.filtered <- lapply(res.tmod, function(x) subset(x, adj.P.Val < 1e-8 & AUC > .7))
pie <- lapply(genes, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
pval = x$pvalue, lfc.thr=0,pval.thr=0.1, mset = msig[sel]))
pie <- lapply(pie, as.data.frame)
tmodPanelPlot(res.tmod.filtered, text.cex = .8, pie = pie, pie.style="rug", grid="at")
res <- split(res_df, f = res_df$contrast)
genes <- lapply(res, function(x) x %>%
filter(!is.na(padj)) %>%
arrange(pvalue))
lgenes <- lapply(genes, function(x) x$gene_name)
res.tmod <- lapply(lgenes, tmodCERNOtest, mset = tmod_mm)
res.tmod.filtered <- lapply(res.tmod, function(x) subset(x, adj.P.Val < 1e-6 & AUC > .7))
pie <- lapply(genes, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
pval = x$pvalue, lfc.thr=0,pval.thr=0.1, mset = tmod_mm))
pie <- lapply(pie, as.data.frame)
tmodPanelPlot(res.tmod.filtered, text.cex = .8, pie = pie, pie.style="rug", grid="at")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /data/cephfs-1/work/groups/cubi/users/milekm_c/miniconda/envs/R-fixed-biomart/lib/libopenblasp-r0.3.24.so; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tmod_0.50.13 stringr_1.5.0
## [3] ggrepel_0.9.3 DESeq2_1.40.2
## [5] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [7] MatrixGenerics_1.12.2 matrixStats_1.0.0
## [9] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
## [11] IRanges_2.34.1 S4Vectors_0.38.1
## [13] BiocGenerics_0.46.0 cowplot_1.1.1
## [15] ggplot2_3.4.3 gtools_3.9.4
## [17] tidyr_1.3.0 dplyr_1.1.3
## [19] SeuratObject_4.1.4 Seurat_4.4.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
## [4] magrittr_2.0.3 spatstat.utils_3.0-3 farver_2.1.1
## [7] rmarkdown_2.25 zlibbioc_1.46.0 vctrs_0.6.3
## [10] ROCR_1.0-11 spatstat.explore_3.2-3 RCurl_1.98-1.12
## [13] S4Arrays_1.0.4 htmltools_0.5.6 sass_0.4.7
## [16] sctransform_0.4.0 parallelly_1.36.0 KernSmooth_2.23-22
## [19] bslib_0.5.1 htmlwidgets_1.6.2 ica_1.0-3
## [22] plyr_1.8.9 plotly_4.10.2 zoo_1.8-12
## [25] cachem_1.0.8 igraph_1.5.1 mime_0.12
## [28] lifecycle_1.0.3 pkgconfig_2.0.3 Matrix_1.6-1.1
## [31] R6_2.5.1 fastmap_1.1.1 GenomeInfoDbData_1.2.10
## [34] fitdistrplus_1.1-11 future_1.33.0 shiny_1.7.5
## [37] digest_0.6.33 colorspace_2.1-0 patchwork_1.1.3
## [40] tensor_1.5 irlba_2.3.5.1 crosstalk_1.2.0
## [43] labeling_0.4.3 progressr_0.14.0 fansi_1.0.4
## [46] spatstat.sparse_3.0-2 httr_1.4.7 polyclip_1.10-6
## [49] abind_1.4-5 compiler_4.3.1 withr_2.5.1
## [52] BiocParallel_1.34.2 gplots_3.1.3 MASS_7.3-60
## [55] DelayedArray_0.26.6 caTools_1.18.2 tools_4.3.1
## [58] lmtest_0.9-40 beeswarm_0.4.0 msigdbr_7.5.1
## [61] httpuv_1.6.11 future.apply_1.11.0 goftest_1.2-3
## [64] glue_1.6.2 nlme_3.1-163 promises_1.2.1
## [67] grid_4.3.1 Rtsne_0.16 cluster_2.1.4
## [70] reshape2_1.4.4 generics_0.1.3 gtable_0.3.4
## [73] spatstat.data_3.0-1 data.table_1.14.8 XVector_0.40.0
## [76] sp_2.1-0 utf8_1.2.3 spatstat.geom_3.2-5
## [79] RcppAnnoy_0.0.21 RANN_2.6.1 pillar_1.9.0
## [82] babelgene_22.9 later_1.3.1 splines_4.3.1
## [85] lattice_0.21-9 survival_3.5-7 deldir_1.0-9
## [88] tidyselect_1.2.0 locfit_1.5-9.8 miniUI_0.1.1.1
## [91] pbapply_1.7-2 knitr_1.44 gridExtra_2.3
## [94] scattermore_1.2 xfun_0.40 pheatmap_1.0.12
## [97] DT_0.28 stringi_1.7.12 tagcloud_0.6
## [100] lazyeval_0.2.2 yaml_2.3.7 evaluate_0.22
## [103] codetools_0.2-19 tibble_3.2.1 cli_3.6.1
## [106] uwot_0.1.16 xtable_1.8-4 reticulate_1.32.0
## [109] munsell_0.5.0 jquerylib_0.1.4 Rcpp_1.0.11
## [112] globals_0.16.2 spatstat.random_3.1-6 png_0.1-8
## [115] XML_3.99-0.14 parallel_4.3.1 ellipsis_0.3.2
## [118] bitops_1.0-7 listenv_0.9.0 plotwidgets_0.5.1
## [121] viridisLite_0.4.2 scales_1.2.1 ggridges_0.5.4
## [124] crayon_1.5.2 leiden_0.4.3 purrr_1.0.2
## [127] rlang_1.1.1